Script for analysing single cell RNA-seq data from Neutrophil mouse study # experimental arms #
control (vehicle)
CXCR inhibitor
Bor, CXCR, + DEX
Dex, Bor
short names:
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(magrittr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(glmGamPoi)
##
## Attaching package: 'glmGamPoi'
##
## The following object is masked from 'package:dplyr':
##
## vars
##
## The following object is masked from 'package:ggplot2':
##
## vars
library(RColorBrewer)
library(tibble)
library(ggplot2)
library(dplyr)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ComplexHeatmap':
##
## add_heatmap
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following object is masked from 'package:SeuratObject':
##
## intersect
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## The following object is masked from 'package:Biobase':
##
## rowMedians
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
##
## The following object is masked from 'package:plotly':
##
## rename
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:plotly':
##
## slice
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## The following object is masked from 'package:sp':
##
## %over%
##
## Loading required package: GenomeInfoDb
##
## Attaching package: 'GenomicRanges'
##
## The following object is masked from 'package:magrittr':
##
## subtract
##
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:Seurat':
##
## Assays
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
##
##
## Attaching package: 'monocle3'
##
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(devtools)
## Loading required package: usethis
library(SeuratWrappers)
library(EnhancedVolcano)
## Loading required package: ggrepel
library(stringr)
library(patchwork)
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:cowplot':
##
## align_plots
##### #####
#####. If you are curious about my parameters #####
#####
#####
#### c1 ####
# c1.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392810_PBS_RS-04390274_count/outs/filtered_feature_bc_matrix/")
# c1 = CreateSeuratObject(c1.dat)
#
# c1
# c1[["percent.mt"]] = PercentageFeatureSet(c1, pattern = "^mt-")
# c1[["percent.mt"]]
#
# VlnPlot(c1, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
#
# plot1 = FeatureScatter(c1, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2
#
#
# meta = c1@meta.data
# meta$cells = rownames(meta)
#
# meta = meta %>%
# dplyr::rename(seq_folder = orig.ident,
# nUMI = nCount_RNA,
# nGene = nFeature_RNA)
# View(meta)
#
# #number of cells included in sample
# meta %>%
# ggplot(aes(x=nGene, fill = seq_folder)) +
# geom_bar()+
# theme_classic()+
# theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))+
# theme(plot.title=element_text(hjust=0.5, face="bold"))+
# ggtitle("Number of Cells")
#
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>%
# ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) +
# geom_density(alpha = 0.2) +
# scale_x_log10() +
# theme_classic() +
# ylab("Cell density") +
# geom_vline(xintercept = 8361.788)
#
# #visualize the distribution of genes detected per cell via histogram
# meta %>%
# ggplot(aes(color=sample, x=nGene, fill= sample)) +
# geom_density(alpha = 0.2) +
# theme_classic() +
# scale_x_log10() +
# geom_vline(xintercept = 1184)+
# scale_x_continuous(limits=c(100,5000))
#
#
# c1 = subset(c1, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c1 = NormalizeData(c1, normalization.method = "LogNormalize", scale.factor = 1e4)
#
# c1 = FindVariableFeatures(c1, selection.method = "vst", nfeatures = 2000)
# c1.10 = head(VariableFeatures(c1),10)
# c1.plot = VariableFeaturePlot(c1)
# c1.top10.p = LabelPoints(plot = c1.plot, points = c1.10, repel = T, ynudge = 0, xnudge = 0)
#
# all.genes = rownames(c1)
# c1 = ScaleData(c1, features = all.genes)
#
# c1 = RunPCA(object = c1, features = VariableFeatures(object=c1), genes.print = 10)
# ElbowPlot(c1)
#
# # c1 = JackStraw(c1, num.replicate = 100)
# # c1 = ScoreJackStraw(c1, dims=1:15)
# # JackStrawPlot(c1, dims = 1:15)
#
#
#
# c1 = FindNeighbors(c1, dims = 1:30)
# c1 = FindClusters(c1, resolution = .4)
# c1 = RunUMAP(c1, dims = 1:30, n.components = 3)
# DimPlot(c1, reduction = "umap", label=T)
#
# c1 <- RunAzimuth(c1, reference = "bonemarrowref")
#
# c1$predicted.celltype.l1
#
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l3", label=T)
#
# c1@active.ident = as.factor(c1$predicted.celltype.l1)
#
# c1@assays$RNA$data = c1@assays$prediction.score.celltype.l1$data
#
# saveRDS(c1, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds")
# c1 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds')
#
# ### c2 ####
# c2.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392811_CXCR2_RS-04390275_count/outs/filtered_feature_bc_matrix/")
# c2 = CreateSeuratObject(c2.dat)
#
# c2@active.assay = "RNA"
# c2[["percent.mt"]] = PercentageFeatureSet(c2, pattern = "^mt-")
# c2[["percent.mt"]]
#
# VlnPlot(c2, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
#
# plot1 = FeatureScatter(c2, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2
#
#
# meta = c2@meta.data
# meta$cells = rownames(meta)
#
# meta = meta %>%
# dplyr::rename(seq_folder = orig.ident,
# nUMI = nCount_RNA,
# nGene = nFeature_RNA)
# View(meta)
#
# #number of cells included in sample
# meta %>%
# ggplot(aes(x=nGene, fill = seq_folder)) +
# geom_bar()+
# theme_classic()+
# theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))+
# theme(plot.title=element_text(hjust=0.5, face="bold"))+
# ggtitle("Number of Cells")
#
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>%
# ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) +
# geom_density(alpha = 0.2) +
# scale_x_log10() +
# theme_classic() +
# ylab("Cell density") +
# geom_vline(xintercept = 8361.788)
#
# #visualize the distribution of genes detected per cell via histogram
# meta %>%
# ggplot(aes(color=sample, x=nGene, fill= sample)) +
# geom_density(alpha = 0.2) +
# theme_classic() +
# scale_x_log10() +
# geom_vline(xintercept = 1184)+
# scale_x_continuous(limits=c(100,5000))
#
#
# c2 = subset(c2, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c2 = NormalizeData(c2, normalization.method = "LogNormalize", scale.factor = 1e4)
#
# c2 = FindVariableFeatures(c2, selection.method = "vst", nfeatures = 2000)
# c2.10 = head(VariableFeatures(c2),10)
# c2.plot = VariableFeaturePlot(c2)
# c2.top10.p = LabelPoints(plot = c2.plot, points = c2.10, repel = T, ynudge = 0, xnudge = 0)
#
# all.genes = rownames(c2)
# c2 = ScaleData(c2, features = all.genes)
#
# c2 = RunPCA(object = c2, features = VariableFeatures(object=c2), genes.print = 10)
# ElbowPlot(c2)
#
# # c2 = JackStraw(c2, num.replicate = 100)
# # c2 = ScoreJackStraw(c2, dims=1:15)
# # JackStrawPlot(c2, dims = 1:15)
#
# c2 = FindNeighbors(c2, dims = 1:30)
# c2 = FindClusters(c2, resolution = .4)
# c2 = RunUMAP(c2, dims = 1:30, n.components = 3)
# DimPlot(c2, reduction = "umap", label=T)
#
# library(Azimuth)
# c2_anno <- RunAzimuth(c2, reference = "bonemarrowref")
#
# c2_anno$predicted.celltype.l1
#
# DimPlot(c2_anno, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c2_anno, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
#
# c2_anno@active.ident = as.factor(c2_anno$predicted.celltype.l1)
#
# saveRDS(c2, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2.rds")
# c2_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2.rds')
#
# saveRDS(c2_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds")
# c2 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds')
#
#
# ### c3 ####
# c3.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392812_BTZ-Dex-B-D_RS-04390276_count/outs/filtered_feature_bc_matrix/")
# c3 = CreateSeuratObject(c3.dat, assay = "RNA")
#
# c3[["percent.mt"]] = PercentageFeatureSet(c3, pattern = "^mt-")
# c3[["percent.mt"]]
#
#
# VlnPlot(c3, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
#
# plot1 = FeatureScatter(c3, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c3, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2
#
#
# meta = c3@meta.data
# meta$cells = rownames(meta)
#
# meta = meta %>%
# dplyr::rename(seq_folder = orig.ident,
# nUMI = nCount_RNA,
# nGene = nFeature_RNA)
# View(meta)
#
# #number of cells included in sample
# meta %>%
# ggplot(aes(x=nGene, fill = seq_folder)) +
# geom_bar()+
# theme_classic()+
# theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))+
# theme(plot.title=element_text(hjust=0.5, face="bold"))+
# ggtitle("Number of Cells")
#
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>%
# ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) +
# geom_density(alpha = 0.2) +
# scale_x_log10() +
# theme_classic() +
# ylab("Cell density") +
# geom_vline(xintercept = 8361.788)
#
# #visualize the distribution of genes detected per cell via histogram
# meta %>%
# ggplot(aes(color=sample, x=nGene, fill= sample)) +
# geom_density(alpha = 0.2) +
# theme_classic() +
# scale_x_log10() +
# geom_vline(xintercept = 1184)+
# scale_x_continuous(limits=c(100,5000))
#
#
# c3 = subset(c3, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c3 = NormalizeData(c3, normalization.method = "LogNormalize", scale.factor = 1e4)
#
# c3 = FindVariableFeatures(c3, selection.method = "vst", nfeatures = 2000)
# c3.10 = head(VariableFeatures(c3),10)
# c3.plot = VariableFeaturePlot(c3)
# c3.top10.p = LabelPoints(plot = c3.plot, points = c3.10, repel = T, ynudge = 0, xnudge = 0)
#
# all.genes = rownames(c3)
# c3 = ScaleData(c3, features = all.genes)
#
# c3 = RunPCA(object = c3, features = VariableFeatures(object=c3), genes.print = 10)
# ElbowPlot(c3)
#
# # c3 = JackStraw(c3, num.replicate = 100)
# # c3 = ScoreJackStraw(c3, dims=1:15)
# # JackStrawPlot(c3, dims = 1:15)
#
#
#
# c3 = FindNeighbors(c3, dims = 1:30)
# c3 = FindClusters(c3, resolution = .4)
# c3 = RunUMAP(c3, dims = 1:30, n.components = 3)
# DimPlot(c3, reduction = "umap", label=T)
#
# c_anno <- RunAzimuth(c3, reference = "bonemarrowref")
#
# c_anno$predicted.celltype.l1
#
# DimPlot(c_anno, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c_anno, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
#
# c_anno@active.ident = as.factor(c_anno$predicted.celltype.l1)
#
# c3_anno = c_anno
# saveRDS(c3, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3.rds")
# c3_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3.rds')
#
# saveRDS(c_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds")
# c3 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds')
#
#
# ### c4 ####
# c4.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392813_Triple_RS-04390277_count/outs/filtered_feature_bc_matrix/")
# c4 = CreateSeuratObject(c4.dat, assay = "RNA")
#
# c4[["percent.mt"]] = PercentageFeatureSet(c4, pattern = "^mt-")
# c4[["percent.mt"]]
#
#
# VlnPlot(c4, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
#
# plot1 = FeatureScatter(c4, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c4, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2
#
#
# meta = c4@meta.data
# meta$cells = rownames(meta)
#
# meta = meta %>%
# dplyr::rename(seq_folder = orig.ident,
# nUMI = nCount_RNA,
# nGene = nFeature_RNA)
# View(meta)
#
# #number of cells included in sample
# meta %>%
# ggplot(aes(x=nGene, fill = seq_folder)) +
# geom_bar()+
# theme_classic()+
# theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1))+
# theme(plot.title=element_text(hjust=0.5, face="bold"))+
# ggtitle("Number of Cells")
#
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>%
# ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) +
# geom_density(alpha = 0.2) +
# scale_x_log10() +
# theme_classic() +
# ylab("Cell density") +
# geom_vline(xintercept = 8361.788)
#
# #visualize the distribution of genes detected per cell via histogram
# meta %>%
# ggplot(aes(color=sample, x=nGene, fill= sample)) +
# geom_density(alpha = 0.2) +
# theme_classic() +
# scale_x_log10() +
# geom_vline(xintercept = 1184)+
# scale_x_continuous(limits=c(100,5000))
#
#
# c4 = subset(c4, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c4 = NormalizeData(c4, normalization.method = "LogNormalize", scale.factor = 1e4)
#
# c4 = FindVariableFeatures(c4, selection.method = "vst", nfeatures = 2000)
# c4.10 = head(VariableFeatures(c4),10)
# c4.plot = VariableFeaturePlot(c4)
# c4.top10.p = LabelPoints(plot = c4.plot, points = c4.10, repel = T, ynudge = 0, xnudge = 0)
#
# all.genes = rownames(c4)
# c4 = ScaleData(c4, features = all.genes)
#
# c4 <- RunPCA(object = c4, features = VariableFeatures(object=c4), genes.print = 10)
# ElbowPlot(c4)
#
# # c4 = JackStraw(c4, num.replicate = 100)
# # c4 = ScoreJackStraw(c4, dims=1:15)
# # JackStrawPlot(c4, dims = 1:15)
#
#
#
# c4 <- FindNeighbors(c4, dims = 1:30)
# c4 <- FindClusters(c4, resolution = .4)
# c4 <- RunUMAP(c4, dims = 1:30, n.components = 3)
# DimPlot(c4, reduction = "umap", label=T)
#
# library(Azimuth)
# c4_anno <- RunAzimuth(c4, reference = "bonemarrowref")
#
# c4_anno$predicted.celltype.l1
#
# DimPlot(c4_anno, group.by = "predicted.celltype.l1", label=T)
# DimPlot(c4_anno, group.by = "predicted.celltype.l2", label=T)
#
# c4_anno@active.ident = as.factor(c4_anno$predicted.celltype.l1)
#
# saveRDS(c4, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4.rds")
# c4_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4.rds')
#
# saveRDS(c4_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds")
# c4 <- readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds')
c1 <- readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds')
c2 <- readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds')
c3 <- readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds')
c4 <- readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds')
c1$sample = "c1"
c2$sample = "c2"
c3$sample = "c3"
c4$sample = "c4"
merge = merge(x = c1, y = c(c2,c3,c4))
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
merge = JoinLayers(merge)
merge$sample <- as.factor(merge$sample)
VlnPlot(merge, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#### statistics ####
merge <- NormalizeData(merge, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge <- FindVariableFeatures(merge, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <- rownames(merge)
merge <- ScaleData(merge, features = all.genes)
## Centering and scaling data matrix
merge <- RunPCA(merge, features = VariableFeatures(merge), genes.print = 25)
## PC_ 1
## Positive: CLEC4D, G0S2, SLC7A11, CAMP, IL1B, FCGR3B, BASP1, ACVRL1, THBS1, CD63
## GDPD3, EGR1, INHBA, VAMP5, ISG15, CEBPE, ACOD1, CTSE, MGST2, FGL2
## GCA, PTGS2, OLFM4, AGPAT2, STEAP4, SOCS3, IFIT3, OASL, SERPINB1, HMOX1
## Negative: RPL10A, RPL3, RPL36A, RPS18, RPS2, RPS17, RPS19, RPL32, RACK1, NME2
## RPS5, RPL15, RANBP1, RPL22, RPL13, MT-ND1, SETSIP, RPS28, KRTCAP2, RPS4X
## YBX3, SNRPD1, RPS8, RPLP0, RPS6, RPL28, NAP1L1, RPL8, ANP32B, TUBA1B
## PC_ 2
## Positive: SELENOW, FYN, GIMAP6, IL7R, ABLIM1, KLRD1, CD7, GIMAP4, GIMAP1, CHD3
## CTSS, CTSW, CD2AP, SKAP1, RNASE6, CD2, KBTBD11, TCF7, THY1, AC008012.1
## MRPL52, BCL2, TXK, ITGA4, ZFP36L1, GIMAP8, GIMAP7, CARD11, GIMAP1-GIMAP5, CCR5
## Negative: HMGB2, AC087721.2, MKI67, KIF11, PRC1, TPX2, BIRC5, SPC25, PCLAF, KIF15
## CDK1, KNL1, CCNA2, CDCA3, CENPE, KIF4A, HMMR, NDC80, SGO2, NUF2
## UBE2C, SPC24, PLK1, RRM2, FBXO5, KIF22, ASF1B, DLGAP5, NCAPG, TACC3
## PC_ 3
## Positive: FN1, S100A4, IFI30, CTSS, PLD4, RASSF4, MS4A6A, LY86, PSAP, F13A1
## KLF4, CTSC, CD302, LRP1, PID1, MCUB, IRF5, NUPR1, NAAA, CST3
## CLEC4C, CX3CR1, CTSB, F10, RND3, RNASE6, OLFM1, PLCB1, CSF1R, PLEKHO1
## Negative: NKG7, ADGRG1, AC008012.1, MYB, ANGPT1, MUC13, CD27, NEDD4, RAB38, CST7
## GIMAP6, KIT, NRGN, ZFPM1, CD34, ADGRL4, BCL2, PHGDH, NME4, GIMAP1
## SOX4, SDSL, SLC22A3, IGFBP4, TSPAN4, IKZF2, CD63, LAT, WDR12, MYCT1
## PC_ 4
## Positive: GIMAP4, GIMAP6, SKAP1, CD2, CTSW, THY1, ABLIM1, TCF7, TXK, IL2RB
## GIMAP7, CD3D, GIMAP8, TRBC1, BCL11B, LEF1, GIMAP1-GIMAP5, KLRD1, SH2D1A, CD226
## GIMAP1, ITK, IL7R, CD247, RASGRP1, LAT, IKZF3, KBTBD11, CD8B2, CD8A
## Negative: CTSG, MPO, SRM, PRTN3, FKBP11, CD34, GSTO1, SYCE2, NME4, SHMT1
## RAB38, MUC13, APEX1, IDH2, CST3, DNPH1, RANGRF, TSPAN4, WDR12, ADGRL4
## ADGRG1, RAMP1, PLAC8, HACD1, HELLS, TMEM97, CDC6, UNG, LMO2, MCRIP2
## PC_ 5
## Positive: COX6A2, LY6D, CD300C, KMO, TCF4, SDC4, CCND1, AC020909.1, ATP1B1, TSC22D1
## PACSIN1, BLNK, UPB1, P2RY14, RUNX2, NET1, RPGRIP1, FCRLA, PAFAH1B3, PDZD4
## HS3ST1, CXXC5, CYB561A3, ATP2A1, SRGAP3, PTPRF, CACNA1E, GET1-SH3BGR, LIFR, PLTP
## Negative: GIMAP4, IFNGR1, S100A4, HOPX, F13A1, TCF7, TXK, SKAP1, CD3D, FN1
## IL2RB, NKG7, CD302, THY1, CD2, GIMAP7, CLEC4C, LRP1, TRBC1, F10
## BCL11B, CTSW, LEF1, NUPR1, GIMAP8, SH2D1A, CTSC, MCUB, CD226, CSF1R
ElbowPlot(merge)
merge <- FindNeighbors(merge, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
merge <- FindClusters(merge, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 36805
## Number of edges: 1398308
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9260
## Number of communities: 17
## Elapsed time: 9 seconds
## 1 singletons identified. 16 final clusters.
merge <- RunUMAP(merge, dims = 1:50, n.components = 3)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:33:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:33:57 Read 36805 rows and found 50 numeric columns
## 16:33:57 Using Annoy for neighbor search, n_neighbors = 30
## 16:33:57 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:34:03 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f870fa0f24
## 16:34:03 Searching Annoy index using 1 thread, search_k = 3000
## 16:34:17 Annoy recall = 100%
## 16:34:19 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:34:21 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:34:23 Commencing optimization for 200 epochs, with 1709708 positive edges
## 16:34:23 Using rng type: pcg
## 16:34:40 Optimization finished
merge$active.ident = merge$predicted.celltype.l1
p1 <- DimPlot(merge, group.by = "sample")
p2 <- DimPlot(merge, group.by = "active.ident", label = T)
p1+p2
merge@active.ident = as.factor(merge$active.ident)
merge.mono <- subset(merge, subset = predicted.celltype.l2 %in% c("CD14 Mono","CD16 Mono"))
DimPlot(merge.mono, split.by = "sample")
merge.mono <- NormalizeData(merge.mono, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.mono <- FindVariableFeatures(merge.mono, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <- rownames(merge.mono)
merge.mono <- ScaleData(merge.mono, features = all.genes)
## Centering and scaling data matrix
merge.mono <- RunPCA(merge.mono, features = VariableFeatures(merge.mono), genes.print = 25)
## PC_ 1
## Positive: CLEC4D, SLC7A11, BASP1, LTF, THBS1, RSAD2, CD63, EGR1, ACOD1, FCGR3B
## SRGN, GDPD3, MARCKS, PTGS2, OLFM4, HMGB2, ISG15, OASL, TENT5C, HSD11B1
## SOCS3, MARCKSL1, MGST2, CSF1, IFIT3, XPC, ADGRG3, IL1RL1, MMP13, TMEM132D
## Negative: RPL10A, RPL3, RPS18, NME2, RPL36A, RPL32, RACK1, RPS17, RPS5, RPS28
## RPS2, DBI, KRTCAP2, RPL13, MT-ND1, RPL15, AL928654.3, RPS4X, RPLP0, RPS8
## RPS15A, RPL19, RPL8, MT-ND2, EEF1A1, RPLP2, RPS6, PRDX1, RPS15, LGALS1
## PC_ 2
## Positive: TOP2A, RRM2, PCLAF, SMC2, MKI67, STMN1, ESCO2, ASF1B, LIG1, CLSPN
## FCN1, RRM1, TYMS, CDK1, HMGB2, KIF15, TK1, KNL1, PRC1, TPX2
## CCNE2, SPC25, NDC80, SPC24, DTL, CENPH, BIRC5, PBK, SGO2, HMGB3
## Negative: CTSS, AHNAK, S100A4, CCR2, LRP1, CLEC4C, MNDA, CD2AP, PSAP, FN1
## RNASE6, ZEB2, AL928654.3, IFI30, CX3CR1, CCL15, CLEC4A, CD48, F13A1, CD302
## NUPR1, PLEKHO1, IRF5, GNGT2, KLF4, S100A10, SELENOW, MCUB, PTPRO, SLFN5
## PC_ 3
## Positive: GIMAP6, LCK, ABLIM1, SKAP1, CTSW, GIMAP7, CD3E, GIMAP1, IL2RB, THY1
## GIMAP8, TCF7, PTPRCAP, BCL11B, GIMAP1-GIMAP5, LAT, TXK, TRBC1, IL7R, AC008012.1
## PRKCQ, CD226, CD27, KLRD1, SH2D1A, TESPA1, FAM189B, IKZF3, NKG7, GPR174
## Negative: FN1, F13A1, IFI30, C3, F10, CD302, MCUB, CCR2, LRP1, PLCB1
## NAAA, MSR1, HPSE, PLAC8, STXBP6, CCL15, ANXA5, TIFAB, KLF4, NAPSA
## TREM2, ARHGEF37, S100A4, HEXB, LAMP1, DUSP22, GPX1, PTPRO, OLFM1, NUPR1
## PC_ 4
## Positive: SRGN, CLEC4D, MARCKS, LMNB1, ST8SIA4, FOS, ESCO2, CST3, KIF23, PRC1
## TOP2A, SPC25, ZEB2, TMPO, PSAP, PCLAF, NDC80, CALM2, CCNA2, CCNF
## RSAD2, NCAPG, PBK, SLFN5, SHISA5, MIS18BP1, SMC2, CDCA2, TUBA1B, NCAPH
## Negative: LTF, LTA4H, CEBPE, ZMPSTE24, TMEM216, ORM1, MGST2, CD63, FFAR2, C3
## AGPAT2, CD81, ICA1, NDUFV3, PDE4D, PGLS, BAZ1A, FCN1, C1QTNF12, PROM1
## CLEC12A, ATP5PD, SPTBN1, AP3S1, HEXB, COX6C, SELENOF, ATP6V0C, COX7B, HSD11B1
## PC_ 5
## Positive: HMMR, ASPM, UBE2C, TPX2, CENPE, DEPDC1, CDC20, PRR11, BIRC5, KIF20A
## PRC1, CCNA2, SPC25, CDCA3, KNL1, KIF4A, KIF14, CCNB1, RACGAP1, KIF2C
## NUF2, LTF, CCNB2, AURKA, CKAP2, MXD3, CENPA, NDC80, KIF15, CEP55
## Negative: CTSG, MPO, SRGN, DMKN, SYCE2, SRM, CST7, PHGDH, HELLS, MCM2
## CDC6, MCM6, APEX1, CLEC10A, DTL, ZMYND19, GINS2, TMA16, UNG, TIMELESS
## DIO2, NME4, PLPPR3, PSMC3IP, WDR12, C1QBP, DHFR2, METTL1, CHEK1, HACD1
ElbowPlot(merge.mono)
merge.mono <- FindNeighbors(merge.mono, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.mono <- FindClusters(merge.mono, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 27089
## Number of edges: 939252
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9020
## Number of communities: 15
## Elapsed time: 5 seconds
merge.mono <- RunUMAP(merge.mono, dims = 1:30, n.components = 3)
## 16:36:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:36:14 Read 27089 rows and found 30 numeric columns
## 16:36:14 Using Annoy for neighbor search, n_neighbors = 30
## 16:36:14 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:36:17 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f828f9090c
## 16:36:17 Searching Annoy index using 1 thread, search_k = 3000
## 16:36:26 Annoy recall = 100%
## 16:36:27 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:36:29 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:36:30 Commencing optimization for 200 epochs, with 1250694 positive edges
## 16:36:30 Using rng type: pcg
## 16:36:43 Optimization finished
p1 <- DimPlot(merge.mono, group.by = "sample")
p2 <- DimPlot(merge.mono, group.by = "predicted.celltype.l2", label = T)
p1+p2
The idea here is to observe the differences caused by CXCR2 inhibitors with BTZ/DEX versus just BTZ/DEX and control.
merge.neuts = subset(merge.mono, subset = seurat_clusters %in% c(0,1,2,5,9))
DimPlot(merge.neuts, split.by = "sample")
merge.neuts <- NormalizeData(merge.neuts, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.neuts <- FindVariableFeatures(merge.neuts, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <- rownames(merge.neuts)
merge.neuts <- ScaleData(merge.neuts, features = all.genes)
## Centering and scaling data matrix
merge.neuts <- RunPCA(merge.neuts, features = VariableFeatures(merge.neuts), genes.print = 25)
## PC_ 1
## Positive: SRGN, CLEC4D, FTH1, STK17B, TPD52, CD300LF, SLPI, MARCKS, CCR1, IL1B
## DUSP1, IL36G, CD84, TSPAN13, EMILIN2, RGS2, TXNIP, ST8SIA4, SEPHS2, DHRS9
## TPM4, PIM1, RESF1, IFITM3, FOS, ZFP36L2, GPCPD1, GCNT2, NLRP3, GM2A
## Negative: CAMP, SERPINB1, CYBB, ANXA1, LCN2, DSTN, GOLIM4, ZMPSTE24, ORM1, ANXA3
## ADPGK, ABCA13, TMEM216, C3, INHBA, ALDH2, MGST2, TKT, TECR, FMNL2
## SCP2, FCN1, FFAR2, CD63, CD81, RHOU, LBP, ETS1, HEXB, NCOA4
## PC_ 2
## Positive: MMP8, FPR1, ADPGK, LCN2, LIMS3, CAPG, ANXA1, CEACAM8, PRDX5, MAP4K4
## DSTN, SRI, VIM, TKT, CYBB, SAMD9L, DHRS9, PNPLA8, ALDH2, KMT5A
## ITGAM, NT5E, HOPX, MYL6, NCOA4, CHIA, RASA2, NIN, GOLIM4, PROK2
## Negative: IL1B, ITGA4, FCN1, GM2A, MARCKS, ORM1, HSPA5, CD244, PRR5L, EGR1
## TSPAN13, TAGLN2, IL13RA1, SLFN5, RPL8, PPP1R15A, CSF1, FOS, PTGS2, ST8SIA4
## TMEM216, CLEC6A, ARHGAP5, CTSS, RGS2, GCNT2, XBP1, DUSP1, RPS8, SOCS3
## PC_ 3
## Positive: FCN1, ORM1, TMEM216, MS4A3, RPL35A, RPS17, MT-ND1, TMEM256, KRTCAP2, GATM
## SELENOS, UPF3B, DBI, PTGR1, MANF, RPL10A, RPS19, IGFBP4, RPL3, RPL28
## SPTSSA, LARP1B, CDCA7L, MT-ND2, PDE4D, RPS28, RPS8, S100A1, TIMM13, MRPL58
## Negative: IFIT3, SLFN5, STAT1, IRF7, CMPK2, SP140, PARP14, XAF1, RTP4, OASL
## AL353671.1, USP41, SAMD9L, ZBP1, HERC6, OAS3, ISG20, EIF2AK2, IRGM, CTSS
## PARP9, MAP4K4, ITGAM, STAT2, MYCBP2, MMP8, PTTG2, BST2, PLAC8, MACF1
## PC_ 4
## Positive: IFIT3, SLFN5, XAF1, OASL, USP41, RTP4, HERC6, IRF7, ZBP1, CMPK2
## ISG20, IRGM, OAS3, STAT1, EIF2AK2, BST2, AL353671.1, PARP14, SP140, STAT2
## MITD1, IFIT1B, PARP9, PLAC8, CD274, ORM1, CTSS, FCN1, UBE2L6, PTTG2
## Negative: ITGAM, NIN, MACF1, MAP4K4, ZNF106, CHD4, TXNIP, ATRX, NABP1, SETX
## KCTD12, TPM4, DHRS9, ANKRD33B, RHOB, ZFP36L2, HOOK3, PHACTR2, GPCPD1, STK17B
## ITGA4, UTRN, ATP2B1, ALCAM, TPD52, MMP8, EMILIN2, ST8SIA4, CD84, TRPS1
## PC_ 5
## Positive: FCN1, NAMPT, EID1, MOV10, CD84, IL36G, CLEC4D, DHRS9, NIN, SEPHS2
## MS4A3, TKT, ORM1, AL603832.3, HOPX, ZNF106, GATM, CAMK1D, FPR1, CDCA7L
## AMER2, PTGR1, TRPS1, SELENOP, STK17B, EMILIN2, SRGN, SFMBT2, TNFSF13B, ANKRD33B
## Negative: CLEC6A, THBS1, CSF1, GM2A, PPP1R15A, NR4A1, IGFBP6, ITGA4, OTULINL, CD244
## TGM2, EGR1, ATF3, CAMP, IL1B, ARHGAP5, PTGS2, IL1RAP, ITGAX, TP53INP2
## AC005154.5, STEAP4, GOLIM4, NRP1, CHIA, JUN, ABCA13, PLAC8, PRR5L, INHBA
ElbowPlot(merge.neuts)
merge.neuts <- FindNeighbors(merge.neuts, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.neuts <- FindClusters(merge.neuts, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 19796
## Number of edges: 567629
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8487
## Number of communities: 7
## Elapsed time: 2 seconds
## 1 singletons identified. 6 final clusters.
merge.neuts <- RunUMAP(merge.neuts, dims = 1:30, n.components = 3)
## 16:37:55 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:37:55 Read 19796 rows and found 30 numeric columns
## 16:37:55 Using Annoy for neighbor search, n_neighbors = 30
## 16:37:55 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:37:58 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f83f394867
## 16:37:58 Searching Annoy index using 1 thread, search_k = 3000
## 16:38:04 Annoy recall = 100%
## 16:38:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:38:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:38:08 Commencing optimization for 200 epochs, with 926446 positive edges
## 16:38:08 Using rng type: pcg
## 16:38:17 Optimization finished
p1 <- DimPlot(merge.neuts, group.by = "sample")
p2 <- DimPlot(merge.neuts, , label = T)
p1+p2
Here I am describing the immature and immature neutrophil populations
I am adding the
Pre-neutrophil
&
Mature neutrophil scores to grasp the dynamics of the cell populations we’ve captured
preneu.feats.mus = read.delim(file = "/samurlab1/Joshua/Neutrophil_outs/preneugenes.txt", sep ="")
preneu.feats.mus = preneu.feats.mus$x
musGenes = preneu.feats.mus
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")
convert_mouse_to_human <- function(gene_list){
output = c()
for(gene in gene_list){
class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
if(!identical(class_key, integer(0)) ){
human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
for(human_gene in human_genes){
output = append(output,human_gene)
}
}
}
return (output)
}
preneu.homo = convert_mouse_to_human(as.vector(preneu.feats.mus))
merge.neuts = AddModuleScore(merge.neuts, features = list(preneu.homo), name = "preneu.sig")
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The following features are not present in the object: SLC25A10,
## ARHGAP19, TRIM59, LDHB, GPR84, RTEL1, CMC2, ZFP41, HMGN2, DTNB, PMF1-BGLAP,
## ELDR, ARHGAP11A, C1orf226, not searching for symbol synonyms
mat_neut_sig = read.table(file = "/samurlab1/Joshua/Neutrophil_outs/mature_neut_sig.txt", sep = "\t")
mat_neut_sig = as.list(mat_neut_sig)
merge.neuts = AddModuleScore(merge.neuts, features = mat_neut_sig, name = "mat_neut_sig")
## Warning: The following features are not present in the object: RETNLG, CCL6,
## PRR13, BTG1, CXCR2, FXYD5, H2-D1, FTL1, MAP1LC3B, CYP4F18, IL1F9, AMICA1,
## IFI27L2A, STFA2L1, CXCL2, GM5483, IFITM1, not searching for symbol synonyms
Score visualization
p1 = DimPlot(merge.neuts, label = T)
p2 = RidgePlot(merge.neuts, features = "preneu.sig1", group.by = "seurat_clusters")
p3 = RidgePlot(merge.neuts, features = "mat_neut_sig1", group.by = "seurat_clusters")
p1+p2+p3
## Picking joint bandwidth of 0.00566
## Picking joint bandwidth of 0.0483
merge.neuts.current.cluster.ids = c(0,1,2,3,4,5)
merge.neuts.new.cluster.ids =c("int mature", "mature","int immature","immature","mature","immature")
names(merge.neuts.new.cluster.ids) = levels(merge.neuts)
merge.neuts = RenameIdents(merge.neuts, merge.neuts.new.cluster.ids)
DimPlot(merge.neuts, reduction="umap", label=TRUE, pt.size=.75, label.size = 5)
Data visualisation for confidence in data
# Mouse neutrophils
SCpubr::do_FeaturePlot(merge.neuts, features = "MKI67", pt.size = 0.75,plot.title = "MKI67")
SCpubr::do_FeaturePlot(merge.neuts, features = "LTF", pt.size = 0.75, plot.title = "LTF")
SCpubr::do_FeaturePlot(merge.neuts, features = "CAMP", pt.size = 0.75, plot.title = "CAMP")
SCpubr::do_FeaturePlot(merge.neuts, features = "MMP9", pt.size = 0.75, plot.title = "MMP9")
SCpubr::do_FeaturePlot(merge.neuts, features = "FCGR3B", pt.size = 0.75, plot.title = "FCGR3B")
SCpubr::do_FeaturePlot(merge.neuts, features = "VEGFA", pt.size = 0.75, plot.title = "VEGFA")
merge.neuts$active.ident = merge.neuts@active.ident
meta = merge.neuts@meta.data
plot_data <- meta %>%
group_by(sample, active.ident) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(sample) %>%
mutate(proportion = count / sum(count)) %>%
mutate(label = paste0(round(proportion * 100, 1), "%"))
ggplot(plot_data, aes(x = sample, y = proportion, fill = active.ident)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
theme_classic() +
labs(
title = "Proportional Representation of Neutrophils",
x = "Dataset",
y = "Proportion of Cells",
fill = "Active Ident"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "right"
)
merge@active.ident = as.factor(merge$active.ident)
merge.else <- subset(merge, subset = predicted.celltype.l1 %in% c("B","CD4 T","CD8 T","DC","HSPC","NK","other T","other"))
DimPlot(merge.else, split.by = "sample")
merge.else <- NormalizeData(merge.else, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.else <- FindVariableFeatures(merge.else, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <- rownames(merge.else)
merge.else <- ScaleData(merge.else, features = all.genes)
## Centering and scaling data matrix
merge.else <- RunPCA(merge.else, features = VariableFeatures(merge.else), genes.print = 25)
## PC_ 1
## Positive: EEF1G, PTPRCAP, HSP90AB1, HSPE1, RPS2, NME2, PDCD4, AC008012.1, PPP1R14B, MIF
## SLC25A4, DDX21, MRPL52, SRGN, TSPAN13, NCL, FBL, BZW2, C1QBP, PARP1
## NOP10, MSI2, LGALS1, S100A10, MEF2C, GIMAP6, CHD3, WDR43, CALR, REXO2
## Negative: S100A8, CD177, LCN2, CAMP, ANXA1, C3, LTF, HP, MCEMP1, RFLNB
## PGLYRP1, LRG1, FCN1, ITGAM, CKAP4, CHI3L1, PRDX5, PYGL, CLEC5A, ORM1
## AC087721.2, INHBA, ADPGK, SYNE1, ARHGAP19-SLIT1, NCAM1, MEGF9, CYBB, GCA, UBE2C
## PC_ 2
## Positive: ATP5IF1, DUT, ETFB, RANBP1, SYCE2, EIF5AL1, CMTM7, MPO, MCM3, DCTPP1
## DBI, YBX3, ELANE, CDCA7, DTYMK, GSTO1, PGAM1, SLC25A5, MCM7, PLAC8
## IMPDH2, PA2G4, TPI1, SELENOH, IGFBP4, NASP, NHP2, POLA1, RRM1, MCM5
## Negative: LTB, ABLIM1, IL7R, GIMAP6, GIMAP4, CD3D, CD3G, SKAP1, LCK, CD3E
## KLRD1, TCF7, THY1, IKZF3, GIMAP1, TXK, CTSW, ITK, GIMAP8, LEF1
## RASGRP1, BCL11B, KBTBD11, GIMAP1-GIMAP5, IL2RB, PTPRCAP, TRBC1, CD8B2, PRKCQ, SLAMF6
## PC_ 3
## Positive: PSAP, PLD4, CTSS, CTSH, LY86, IFI30, RASSF4, RND3, TPD52, KLF4
## CCR2, AHNAK, RNASE6, TIFAB, MS4A6A, IRF5, SCPEP1, CST3, S100A4, FN1
## CCDC88A, LGALS1, F13A1, S100A6, LGALS3, CTSC, UPB1, CD2AP, NRP1, TGFBI
## Negative: NKG7, CD63, VAMP5, NEDD4, CD81, ADGRG3, CDK6, AC008012.1, ADGRG1, MYB
## PDE4D, TSPAN32, DACH1, RGCC, MS4A3, CA2, MUC13, SYNE1, CST7, SRGN
## PGLYRP1, ERG, CD27, IGFBP4, KDM5B, SDSL, ZFPM1, SERPINB1, TMEM40, ADGRL4
## PC_ 4
## Positive: TCF4, COX6A2, CYB561A3, RUNX2, NUCB2, TSC22D1, LY6D, KMO, LAIR1, CCND1
## SDC4, P2RY14, SMIM5, PLTP, ATP1B1, LIFR, LRP8, AC020909.1, MEF2C, PACSIN1
## BLNK, FMNL2, PAQR5, MCTP2, RNASE6, PAFAH1B3, RPGRIP1, PTPRS, UPB1, NET1
## Negative: HOPX, EMB, S100A10, S100A4, GIMAP4, CD3D, F13A1, CD3G, AL928654.3, LCK
## CD3E, FN1, SKAP1, TCF7, TXK, ARL4C, CCR2, TGFBI, THY1, IFNGR1
## IL2RB, SMPDL3A, CTSW, TRBC1, BCL11B, LEF1, CTSC, NAAA, MCUB, GIMAP8
## PC_ 5
## Positive: MGST2, RGCC, ELANE, HSD11B1, CD3D, ABLIM1, LTA4H, KLRD1, CD3G, TCF7
## LCK, CHD7, IGHM, GIMAP4, CD3E, CD8B2, LEF1, GATM, CTSW, ALAS1
## CD8A, TXK, KBTBD11, CLDN15, SLAMF6, TFRC, GRAMD2B, RFLNB, PDE2A, DMKN
## Negative: GATA2, CPA3, CSRP3, SLC18A2, CYP11A1, ITGA2B, HACD4, EMILIN2, STX3, CCL3L3
## CSF1, TBC1D4, SLC7A8, RGS1, AQP9, IL1RL1, IL6, RNASE12, CDH1, DAPP1
## CCL15, MYO1D, APOE, CCR1, NEDD4, IGFBP7, LILRB3, ADGRG1, IER3, CEBPA
ElbowPlot(merge.else)
merge.else <- FindNeighbors(merge.else, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.else <- FindClusters(merge.else, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10290
## Number of edges: 359982
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9373
## Number of communities: 18
## Elapsed time: 0 seconds
merge.else <- RunUMAP(merge.else, dims = 1:30, n.components = 3)
## 16:39:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:39:36 Read 10290 rows and found 30 numeric columns
## 16:39:36 Using Annoy for neighbor search, n_neighbors = 30
## 16:39:36 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:39:38 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f8381bb28e
## 16:39:38 Searching Annoy index using 1 thread, search_k = 3000
## 16:39:40 Annoy recall = 100%
## 16:39:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:39:43 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:39:43 Commencing optimization for 200 epochs, with 414590 positive edges
## 16:39:43 Using rng type: pcg
## 16:39:48 Optimization finished
p1 <- DimPlot(merge.else, group.by = "sample")
p2 <- DimPlot(merge.else, group.by = "predicted.celltype.l2", label = T)
p1+p2
cell_type_colors <- c(
"B" = "#e31a1c",
"CD4 T" = "#1f78b4",
"CD8 T" = "#ffb000",
"DC" = "#00441b",
"HSPC" = "#1cd3a2",
"NK" = "#6a3d9a",
"other" = "#b15928",
"other T" = "#33a02c"
)
merge.else$active.ident <- factor(merge.else$active.ident, levels = names(cell_type_colors))
meta = merge.else@meta.data
plot_data <- meta %>%
group_by(sample, active.ident) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(sample) %>%
mutate(proportion = count / sum(count)) %>%
mutate(label = paste0(round(proportion * 100, 1), "%")) # Create label with 1 decimal place
# Step 2: Plot with text labels
ggplot(plot_data, aes(x = sample, y = proportion, fill = active.ident)) +
geom_bar(stat = "identity", position = "stack") +
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
theme_classic() +
labs(
title = "Proportional Representation of Neutrophils",
x = "Dataset",
y = "Proportion of Cells",
fill = "Active Ident"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "right"
)
SCpubr::do_DimPlot(merge.else, group.by = "active.ident",pt.size = 0.2)